Load occurrence dataset

Empirical fossil sampling rates

Distribution over the species lifetime

Check that this is not simply due to the central limit theorem (using artificial uniform distributions).

To explore the fossil sampling heterogeneity only keep occurrences within the species range.

Controls of fossil sampling rates among species

Continuous factors

Categorical factors

Functional data

## 
## FALSE  TRUE 
##     2   368
## [1] "Globoconella miotumida"     "Neogloboquadrina atlantica"
## 
## FALSE  TRUE 
##    85   368

Functional groups are only defined for Macroperforate foraminifera. Add an additional morphogroup for Microperforate foraminifera.

## 
## Call:
## lm(formula = sampling_rate ~ relevel(morphogroup, ref = "2"), 
##     data = sampling_heterogeneity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.268 -15.546  -5.154  10.773 127.271 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                    23.1002     2.4715   9.347
## relevel(morphogroup, ref = "2")NA             -11.4790     5.5570  -2.066
## relevel(morphogroup, ref = "2")1              -19.4626     9.7615  -1.994
## relevel(morphogroup, ref = "2")3                5.2506     4.7563   1.104
## relevel(morphogroup, ref = "2")4               11.0408     7.4600   1.480
## relevel(morphogroup, ref = "2")5              -16.6040    10.8435  -1.531
## relevel(morphogroup, ref = "2")6              -12.8679     9.7615  -1.318
## relevel(morphogroup, ref = "2")7                9.1462     3.9751   2.301
## relevel(morphogroup, ref = "2")8              -20.2430    21.2603  -0.952
## relevel(morphogroup, ref = "2")9               -5.1970     9.7615  -0.532
## relevel(morphogroup, ref = "2")10             -16.1625     8.3550  -1.934
## relevel(morphogroup, ref = "2")11             -21.5617    21.2603  -1.014
## relevel(morphogroup, ref = "2")12               5.4326     7.8641   0.691
## relevel(morphogroup, ref = "2")13              -0.8358     5.8289  -0.143
## relevel(morphogroup, ref = "2")14               0.8393     5.1357   0.163
## relevel(morphogroup, ref = "2")15              -1.8244     6.5777  -0.277
## relevel(morphogroup, ref = "2")16              -2.1235     7.4600  -0.285
## relevel(morphogroup, ref = "2")17               1.7028     4.3844   0.388
## relevel(morphogroup, ref = "2")18               7.2789     6.5777   1.107
## relevel(morphogroup, ref = "2")19               4.3778     7.8641   0.557
## relevel(morphogroup, ref = "2")Microperforate  -5.9270     3.9240  -1.510
##                                               Pr(>|t|)    
## (Intercept)                                     <2e-16 ***
## relevel(morphogroup, ref = "2")NA               0.0396 *  
## relevel(morphogroup, ref = "2")1                0.0469 *  
## relevel(morphogroup, ref = "2")3                0.2704    
## relevel(morphogroup, ref = "2")4                0.1398    
## relevel(morphogroup, ref = "2")5                0.1266    
## relevel(morphogroup, ref = "2")6                0.1883    
## relevel(morphogroup, ref = "2")7                0.0220 *  
## relevel(morphogroup, ref = "2")8                0.3417    
## relevel(morphogroup, ref = "2")9                0.5948    
## relevel(morphogroup, ref = "2")10               0.0539 .  
## relevel(morphogroup, ref = "2")11               0.3112    
## relevel(morphogroup, ref = "2")12               0.4901    
## relevel(morphogroup, ref = "2")13               0.8861    
## relevel(morphogroup, ref = "2")14               0.8703    
## relevel(morphogroup, ref = "2")15               0.7817    
## relevel(morphogroup, ref = "2")16               0.7761    
## relevel(morphogroup, ref = "2")17               0.6980    
## relevel(morphogroup, ref = "2")18               0.2692    
## relevel(morphogroup, ref = "2")19               0.5781    
## relevel(morphogroup, ref = "2")Microperforate   0.1318    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.12 on 349 degrees of freedom
## Multiple R-squared:  0.1018, Adjusted R-squared:  0.05031 
## F-statistic: 1.977 on 20 and 349 DF,  p-value: 0.007872
## 
## Call:
## lm(formula = sampling_rate ~ relevel(ecogroup, ref = "1"), data = sampling_heterogeneity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.442 -15.265  -6.601  13.137 128.785 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      28.262      2.062  13.705  < 2e-16 ***
## relevel(ecogroup, ref = "1")NA  -12.603      3.319  -3.797 0.000172 ***
## relevel(ecogroup, ref = "1")2    -7.935      4.000  -1.984 0.048059 *  
## relevel(ecogroup, ref = "1")3    -2.153      2.984  -0.722 0.470994    
## relevel(ecogroup, ref = "1")4   -10.284      3.477  -2.958 0.003299 ** 
## relevel(ecogroup, ref = "1")5    11.775     10.765   1.094 0.274774    
## relevel(ecogroup, ref = "1")6   -20.267     10.765  -1.883 0.060538 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.13 on 363 degrees of freedom
## Multiple R-squared:  0.06442,    Adjusted R-squared:  0.04896 
## F-statistic: 4.166 on 6 and 363 DF,  p-value: 0.0004588

Correlations between categorical factors

Generalized Linear and Additive Models (with Gamma distribution)

## quartz_off_screen 
##                 2

-> A Gamma family seems to be a better fit

## 
## Call:
## glm(formula = sampling_rate ~ rel.abun + abs(pal.lat) + pal.long + 
##     age + sample.depth + pal.absLat.var + pal.long.var, family = Gamma(link = "log"), 
##     data = sampling_heterogeneity[sampling_heterogeneity$rel.abun < 
##         25, ])
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.1408310  0.1764896   6.464 3.52e-10 ***
## rel.abun        0.1113350  0.0124575   8.937  < 2e-16 ***
## abs(pal.lat)   -0.0054140  0.0029380  -1.843 0.066232 .  
## pal.long       -0.0009055  0.0005529  -1.638 0.102388    
## age             0.0064433  0.0030998   2.079 0.038398 *  
## sample.depth   -0.0016615  0.0004803  -3.460 0.000609 ***
## pal.absLat.var -0.0008867  0.0004294  -2.065 0.039667 *  
## pal.long.var    2.2019945  0.2012628  10.941  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.7073319)
## 
##     Null deviance: 427.27  on 349  degrees of freedom
## Residual deviance: 279.56  on 342  degrees of freedom
##   (16 observations deleted due to missingness)
## AIC: 2778.3
## 
## Number of Fisher Scoring iterations: 8
## 
## Call:
## glm(formula = sampling_rate ~ rel.abun + abs(pal.lat) + pal.long + 
##     age + sample.depth + pal.absLat.var + pal.long.var + reason + 
##     age.calc + morphogroup + ecogroup, family = Gamma(link = "log"), 
##     data = sampling_heterogeneity[sampling_heterogeneity$rel.abun < 
##         25, ])
## 
## Coefficients: (1 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.1415025  0.2428189   4.701 3.90e-06 ***
## rel.abun                   0.1105284  0.0122961   8.989  < 2e-16 ***
## abs(pal.lat)              -0.0052771  0.0027609  -1.911   0.0569 .  
## pal.long                  -0.0006712  0.0004945  -1.357   0.1757    
## age                        0.0052764  0.0034019   1.551   0.1219    
## sample.depth              -0.0021961  0.0004482  -4.900 1.55e-06 ***
## pal.absLat.var            -0.0003828  0.0004140  -0.925   0.3558    
## pal.long.var               1.7322116  0.2066426   8.383 1.82e-15 ***
## reasonBiostratigraphy      0.1944948  0.1049871   1.853   0.0649 .  
## reasonCommunity analysis  -0.1901695  0.2155368  -0.882   0.3783    
## reasonSelected species    -0.0562383  0.2641076  -0.213   0.8315    
## age.calcModel             -0.6644389  0.1217975  -5.455 9.99e-08 ***
## age.calcZone              -0.9217861  0.3822318  -2.412   0.0165 *  
## age.calcInterp            -2.0249430  0.3092892  -6.547 2.42e-10 ***
## age.calcMagneto           -3.4595271  0.4652247  -7.436 1.01e-12 ***
## morphogroup1              -1.2810255  0.5806592  -2.206   0.0281 *  
## morphogroup2              -0.4635724  0.4335480  -1.069   0.2858    
## morphogroup3               0.0146053  0.4903191   0.030   0.9763    
## morphogroup4               0.4160399  0.5228461   0.796   0.4268    
## morphogroup5              -1.0172240  0.6290322  -1.617   0.1069    
## morphogroup6              -0.4521186  0.5401644  -0.837   0.4032    
## morphogroup7              -0.3567651  0.4539062  -0.786   0.4325    
## morphogroup8              -0.6421687  0.8720130  -0.736   0.4620    
## morphogroup9              -0.3533097  0.5913296  -0.597   0.5506    
## morphogroup10             -1.2914789  0.5412941  -2.386   0.0176 *  
## morphogroup12              0.0121785  0.5208390   0.023   0.9814    
## morphogroup13             -0.1558879  0.4863852  -0.321   0.7488    
## morphogroup14              0.0157533  0.4654325   0.034   0.9730    
## morphogroup15             -0.0388340  0.4966204  -0.078   0.9377    
## morphogroup16             -0.6133349  0.5036000  -1.218   0.2242    
## morphogroup17             -0.0991667  0.4926373  -0.201   0.8406    
## morphogroup18             -0.1142061  0.5365418  -0.213   0.8316    
## morphogroup19             -0.3300354  0.5495653  -0.601   0.5486    
## morphogroupMicroperforate  0.3641262  0.2241514   1.624   0.1053    
## ecogroup1                  0.6459100  0.4243819   1.522   0.1290    
## ecogroup2                  0.9589251  0.4176330   2.296   0.0223 *  
## ecogroup3                  0.6903083  0.4022405   1.716   0.0871 .  
## ecogroup4                  0.5516943  0.4060506   1.359   0.1752    
## ecogroup5                  1.1783699  0.5702967   2.066   0.0396 *  
## ecogroup6                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.5355358)
## 
##     Null deviance: 427.27  on 349  degrees of freedom
## Residual deviance: 199.89  on 311  degrees of freedom
##   (16 observations deleted due to missingness)
## AIC: 2710.2
## 
## Number of Fisher Scoring iterations: 11

## Model matrix is rank deficient. Parameters `ecogroup6` were not
##   estimable.

## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## sampling_rate ~ s(pal.lat, pal.long, bs = "sos", k = 40) + s(rel.abun, 
##     k = 3) + s(pal.absLat.var, k = 3) + s(pal.long.var, k = 3) + 
##     te(age, sample.depth, k = 4) + reason + age.calc + morphogroup + 
##     ecogroup
## 
## Parametric coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                2.53239    0.18784  13.482  < 2e-16 ***
## reasonBiostratigraphy      0.24683    0.11040   2.236   0.0261 *  
## reasonCommunity analysis  -0.09319    0.22284  -0.418   0.6761    
## reasonSelected species    -0.03980    0.25572  -0.156   0.8764    
## age.calcModel             -0.71605    0.11946  -5.994 6.02e-09 ***
## age.calcZone              -0.90454    0.36558  -2.474   0.0139 *  
## age.calcInterp            -1.87800    0.30506  -6.156 2.46e-09 ***
## age.calcMagneto           -3.41394    0.45613  -7.485 8.53e-13 ***
## morphogroup1              -1.01378    0.80266  -1.263   0.2076    
## morphogroup2              -0.33954    0.72922  -0.466   0.6418    
## morphogroup3               0.01080    0.75501   0.014   0.9886    
## morphogroup4               0.30426    0.77339   0.393   0.6943    
## morphogroup5              -0.73563    0.83841  -0.877   0.3810    
## morphogroup6              -0.34392    0.83590  -0.411   0.6811    
## morphogroup7              -0.17911    0.72863  -0.246   0.8060    
## morphogroup8               0.00000    0.00000     NaN      NaN    
## morphogroup9              -0.56360    0.81571  -0.691   0.4902    
## morphogroup10             -1.30969    0.78398  -1.671   0.0959 .  
## morphogroup12              0.33560    0.75950   0.442   0.6589    
## morphogroup13             -0.07910    0.74469  -0.106   0.9155    
## morphogroup14              0.15824    0.72598   0.218   0.8276    
## morphogroup15              0.19555    0.75417   0.259   0.7956    
## morphogroup16             -0.32805    0.75809  -0.433   0.6655    
## morphogroup17             -0.15920    0.75459  -0.211   0.8331    
## morphogroup18             -0.07157    0.77372  -0.093   0.9264    
## morphogroup19             -0.33954    0.78922  -0.430   0.6673    
## morphogroupMicroperforate  0.50803    0.21960   2.313   0.0214 *  
## ecogroup1                  0.60117    0.75446   0.797   0.4262    
## ecogroup2                  0.85344    0.75341   1.133   0.2582    
## ecogroup3                  0.61417    0.73598   0.834   0.4047    
## ecogroup4                  0.40941    0.75028   0.546   0.5857    
## ecogroup5                  0.89907    0.83381   1.078   0.2818    
## ecogroup6                 -0.06011    0.83734  -0.072   0.9428    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf Ref.df      F  p-value    
## s(pal.lat,pal.long)  1.796e+01     39  1.538 1.05e-06 ***
## s(rel.abun)          1.856e+00      2 29.043  < 2e-16 ***
## s(pal.absLat.var)    4.451e-04      2  0.000    0.496    
## s(pal.long.var)      9.835e-01      2 31.308  < 2e-16 ***
## te(age,sample.depth) 5.449e+00     15  1.848 5.70e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 92/93
## R-sq.(adj) =  0.332   Deviance explained = 62.2%
## -REML = 1338.7  Scale est. = 0.46559   n = 350

## quartz_off_screen 
##                 2
## quartz_off_screen 
##                 2
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors

Remove nonsignificant variables

## Warning in AIC.default(sampling_heterogeneity_GLM,
## sampling_heterogeneity_GLM_simplified, : models are not all fitted to the same
## number of observations
## Warning in BIC.default(sampling_heterogeneity_GLM,
## sampling_heterogeneity_GLM_simplified, : models are not all fitted to the same
## number of observations

Deviance partitioning

Derive the respective contribution of all the predictors to the explained variance.

## $Explained.deviance
## [1] 0.6223479
## 
## $hierarchical.partitioning
##                                   Unique Average.share Individual I.perc(%)
## s(pal.lat,pal.long,bs="sos",k=40) 0.0629        0.0624     0.1253     20.13
## s(rel.abun,k=3)                   0.0438        0.0480     0.0918     14.75
## s(pal.absLat.var,k=3)             0.0000        0.0231     0.0231      3.71
## s(pal.long.var,k=3)               0.0525        0.0524     0.1049     16.86
## te(age,sample.depth,k=4)          0.0176        0.0495     0.0671     10.78
## reason                            0.0117       -0.0010     0.0107      1.72
## age.calc                          0.0922        0.0366     0.1288     20.70
## morphogroup                       0.0376        0.0145     0.0521      8.37
## ecogroup                          0.0071        0.0114     0.0185      2.97
## 
## $variables
## [1] "s(pal.lat,pal.long,bs=\"sos\",k=40)" "s(rel.abun,k=3)"                    
## [3] "s(pal.absLat.var,k=3)"               "s(pal.long.var,k=3)"                
## [5] "te(age,sample.depth,k=4)"            "reason"                             
## [7] "age.calc"                            "morphogroup"                        
## [9] "ecogroup"                           
## 
## $type
## [1] "hierarchical.partitioning"
## 
## attr(,"class")
## [1] "gamhp"

Generate LaTeX snippet from GAM results